library(ggplot2)
library(sf)
## Warning: package 'sf' was built under R version 4.3.3
## Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ lubridate 1.9.3     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

LabLecture

boulder <- st_read("/Users/zhongyinjiao/Desktop/EAS648/Lab01/BoulderSocialMedia.shp")
## Reading layer `BoulderSocialMedia' from data source 
##   `/Users/zhongyinjiao/Desktop/EAS648/Lab01/BoulderSocialMedia.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 55519 features and 12 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -788775 ymin: 1917813 xmax: -780555 ymax: 1930053
## Projected CRS: NAD_1983_Albers
boulder
## Simple feature collection with 55519 features and 12 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -788775 ymin: 1917813 xmax: -780555 ymax: 1930053
## Projected CRS: NAD_1983_Albers
## First 10 features:
##            id     DB   extent Climb_dist TrailH_Dis NatMrk_Dis Trails_dis
## 1  6517284333 Flickr 421678.2   1973.108   2368.567   2451.633   49.73422
## 2  6517281191 Flickr 421678.2   1973.108   2368.567   2451.633   49.73422
## 3  6517278961 Flickr 421678.2   1973.108   2368.567   2451.633   49.73422
## 4  6517276295 Flickr 421678.2   1973.108   2368.567   2451.633   49.73422
## 5  6517274727 Flickr 421678.2   1973.108   2368.567   2451.633   49.73422
## 6  6517272539 Flickr 421678.2   1973.108   2368.567   2451.633   49.73422
## 7  6517270109 Flickr 421678.2   1973.108   2368.567   2451.633   49.73422
## 8  6516904527 Flickr 421678.2   1973.108   2368.567   2451.633   49.73422
## 9  6516902971 Flickr 421678.2   1973.108   2368.567   2451.633   49.73422
## 10 6516900761 Flickr 421678.2   1973.108   2368.567   2451.633   49.73422
##    Bike_dis PrarDg_Dis PT_Elev Hydro_dis Street_dis                geometry
## 1  1437.134   1942.125    2064   1359.75   193.9165 POINT (-786099 1929916)
## 2  1437.134   1942.125    2064   1359.75   193.9165 POINT (-786099 1929916)
## 3  1437.134   1942.125    2064   1359.75   193.9165 POINT (-786099 1929916)
## 4  1437.134   1942.125    2064   1359.75   193.9165 POINT (-786099 1929916)
## 5  1437.134   1942.125    2064   1359.75   193.9165 POINT (-786099 1929916)
## 6  1437.134   1942.125    2064   1359.75   193.9165 POINT (-786099 1929916)
## 7  1437.134   1942.125    2064   1359.75   193.9165 POINT (-786099 1929916)
## 8  1437.134   1942.125    2064   1359.75   193.9165 POINT (-786099 1929916)
## 9  1437.134   1942.125    2064   1359.75   193.9165 POINT (-786099 1929916)
## 10 1437.134   1942.125    2064   1359.75   193.9165 POINT (-786099 1929916)
ggplot() +
    geom_sf(data =boulder,
    fill = NA, alpha = .2) +
    theme_bw()

boulder = st_transform(boulder, 26753) 
ggplot() +
    geom_sf(data =boulder,
    fill = NA, alpha = .2) +
    theme_bw()

ggplot() +
    geom_sf(data =boulder, aes(color=PT_Elev),
    fill = NA, alpha = .2) +
    theme_bw()

ggplot() +
    geom_sf(data =boulder, aes(color=PT_Elev),
    fill = NA, alpha = .2) +
  scale_colour_gradientn(colours = terrain.colors(10)) +  
  theme_bw()

boulder %>%
    mutate(high_elev = ifelse(PT_Elev >= 2200, TRUE, FALSE))%>% 
ggplot() +
  geom_sf(aes(color=high_elev),
    fill = NA, alpha = .2)  +  
  theme_bw()

boulder %>%
  filter(DB ==  'Pano' | DB == 'Flickr') %>%
  ggplot(aes(x=DB, y=Street_dis)) + 
  geom_boxplot()

library(sf)
library(ggspatial)
library(viridis)
## Loading required package: viridisLite
## the function gives the hexadecimal colors 
## the interger give the numbers of colors
magma(10)
##  [1] "#000004FF" "#180F3EFF" "#451077FF" "#721F81FF" "#9F2F7FFF" "#CD4071FF"
##  [7] "#F1605DFF" "#FD9567FF" "#FEC98DFF" "#FCFDBFFF"
ggplot() +
    geom_sf(data = boulder, aes(color=PT_Elev),
    fill = NA, alpha = .2) + 
    scale_colour_gradientn(colours = magma(10))

summary(boulder$DB)
##    Length     Class      Mode 
##     55519 character character
p <- ggplot() +
  annotation_spatial(boulder) +
  layer_spatial(boulder, aes(col = DB))
p + scale_color_brewer(palette = "Dark2")

library(tmap)
## 
## Attaching package: 'tmap'
## The following object is masked from 'package:datasets':
## 
##     rivers
tmap_mode("plot")  
## ℹ tmap mode set to "plot".
## Add the data - these are specific to the vector or raster
  ## which variable, is there a class interval, palette, and other options
tm_shape(boulder) + 
  tm_symbols(col = 'PT_Elev', size = 0.1) + 
  tm_scale_intervals(breaks = 5,  
                     values = "brewer.yl_or_rd", 
                     style = "quantile") 

## here we are using a simple dataset of the world 
# tmap_mode("plot")
data("World")
tm_shape(World) +
    tm_polygons("gdp_cap_est", style='quantile', legend.title = "GDP Per Capita Estimate")
## 
## ── tmap v3 code detected ───────────────────────────────────────────────────────
## [v3->v4] `tm_polygons()`: instead of `style = "quantile"`, use fill.scale =
## `tm_scale_intervals()`.
## ℹ Migrate the argument(s) 'style' to 'tm_scale_intervals(<HERE>)'

## the view mode creates an interactive map
tmap_mode("view")
## ℹ tmap mode set to "view".
tm_shape(World) +
    tm_polygons("gdp_cap_est", style='quantile', legend.title = "GDP Per Capita Estimate")
## 
## ── tmap v3 code detected ───────────────────────────────────────────────────────
## [v3->v4] `tm_polygons()`: instead of `style = "quantile"`, use fill.scale =
## `tm_scale_intervals()`.
## ℹ Migrate the argument(s) 'style' to 'tm_scale_intervals(<HERE>)'

Questions:

1.Discuss the advantages and challenges associated with an open data science approach. Provide an example based on this week’s reading. (1-2 paragraphs)

Open data science emphasizes transparency by enabling anyone to access, analyze, and reuse scientific observations, data, and results. This openness allows scientific claims to be validated by allowing other researchers to replicate and validate findings, which is essential for building trust in scientific conclusions. In addition, open data allows for the integration of datasets from multiple sources, resulting in new insights and discoveries that may not be possible with isolated datasets. For example, by integrating spatial data from the social media shape file with other environmental or population datasets, researchers can uncover patterns in urban development or environmental impacts that remain hidden without cross-referencing data.

However, open data science also brings challenges, including maintaining data privacy and ensuring that shared data is well documented and interoperable. Sensitive information must be protected, especially when dealing with personal data or vulnerable people. To reap the benefits of open data, researchers must ensure that data is formatted and annotated in a way that others can understand and use, which requires additional time and effort. For example, the social media shapefile dataset must contain clear documentation of any transformations in its structure, data fields, and applications to ensure that it can be effectively integrated and interpreted by other researchers, supporting reproducibility and potential new discoveries.

2.Create a markdown document that showcases an analysis of this week’s data or any other dataset of your choice. Include descriptive text that explains your analysis, and incorporate figures and geovisualizations.Include 1 chart and 1 map. Structure and explain your analysis with text, headings, highlights, images and other markdown basics.

Analysis of Boulder Social Media Data

This document showcases an analysis of Boulder social media data, focusing on visualizing elevation distribution and exploring the relationship between elevation and proximity to bike trails.

Importing and Inspecting Data

# Load required packages
library(ggplot2)
library(tmap)
library(sf)
library(dplyr)
library(viridis)
library(tmaptools)
# Read the data
boulder <- st_read("/Users/zhongyinjiao/Desktop/EAS648/Lab01/BoulderSocialMedia.shp")
## Reading layer `BoulderSocialMedia' from data source 
##   `/Users/zhongyinjiao/Desktop/EAS648/Lab01/BoulderSocialMedia.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 55519 features and 12 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -788775 ymin: 1917813 xmax: -780555 ymax: 1930053
## Projected CRS: NAD_1983_Albers
boulder
## Simple feature collection with 55519 features and 12 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -788775 ymin: 1917813 xmax: -780555 ymax: 1930053
## Projected CRS: NAD_1983_Albers
## First 10 features:
##            id     DB   extent Climb_dist TrailH_Dis NatMrk_Dis Trails_dis
## 1  6517284333 Flickr 421678.2   1973.108   2368.567   2451.633   49.73422
## 2  6517281191 Flickr 421678.2   1973.108   2368.567   2451.633   49.73422
## 3  6517278961 Flickr 421678.2   1973.108   2368.567   2451.633   49.73422
## 4  6517276295 Flickr 421678.2   1973.108   2368.567   2451.633   49.73422
## 5  6517274727 Flickr 421678.2   1973.108   2368.567   2451.633   49.73422
## 6  6517272539 Flickr 421678.2   1973.108   2368.567   2451.633   49.73422
## 7  6517270109 Flickr 421678.2   1973.108   2368.567   2451.633   49.73422
## 8  6516904527 Flickr 421678.2   1973.108   2368.567   2451.633   49.73422
## 9  6516902971 Flickr 421678.2   1973.108   2368.567   2451.633   49.73422
## 10 6516900761 Flickr 421678.2   1973.108   2368.567   2451.633   49.73422
##    Bike_dis PrarDg_Dis PT_Elev Hydro_dis Street_dis                geometry
## 1  1437.134   1942.125    2064   1359.75   193.9165 POINT (-786099 1929916)
## 2  1437.134   1942.125    2064   1359.75   193.9165 POINT (-786099 1929916)
## 3  1437.134   1942.125    2064   1359.75   193.9165 POINT (-786099 1929916)
## 4  1437.134   1942.125    2064   1359.75   193.9165 POINT (-786099 1929916)
## 5  1437.134   1942.125    2064   1359.75   193.9165 POINT (-786099 1929916)
## 6  1437.134   1942.125    2064   1359.75   193.9165 POINT (-786099 1929916)
## 7  1437.134   1942.125    2064   1359.75   193.9165 POINT (-786099 1929916)
## 8  1437.134   1942.125    2064   1359.75   193.9165 POINT (-786099 1929916)
## 9  1437.134   1942.125    2064   1359.75   193.9165 POINT (-786099 1929916)
## 10 1437.134   1942.125    2064   1359.75   193.9165 POINT (-786099 1929916)

Elevation Distribution Map

The following map uses the tmap package to visualize elevation across the Boulder area. Higher elevations are displayed in lighter shades, while lower elevations are in darker shades.

library(ggplot2)
library(viridis)

ggplot() +
  geom_sf(data = boulder, aes(color = PT_Elev), fill = NA, alpha = 0.7) + 
  scale_colour_viridis_c(option = "magma", name = "Elevation") +
  labs(title = "Elevation Map of Boulder") +
  theme_minimal()

library(ggplot2)
library(sf)
library(viridis)

# get the latitude and longtitude
boulder <- boulder %>% 
  mutate(x = st_coordinates(.)[,1],
         y = st_coordinates(.)[,2])

# plot the density
ggplot(data = boulder) +
  stat_density_2d(aes(x = x, y = y, fill = ..level..), geom = "polygon") +
  scale_fill_viridis_c(option = "magma") +
  labs(title = "Density Map of PT_Elev in Boulder") +
  theme_minimal()
## Warning: The dot-dot notation (`..level..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(level)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

ggplot(data = boulder) +
  geom_point(aes(x = Bike_dis, y = PT_Elev, color = PT_Elev)) +
  scale_color_viridis_c(option = "inferno") +
  labs(title = "Bike Distance vs PT_Elev", x = "Bike Distance", y = "PT_Elev") +
  theme_minimal()

Relationship Between Elevation and Bike Trail Distance

Here, I use ggplot2 to create a scatter plot to explore the relationship between elevation (PT_Elev) and the distance to the nearest bike trail (Bike_dis).

# Plot elevation vs bike distance
ggplot(data = boulder) +
  geom_point(aes(x = Bike_dis, y = PT_Elev, color = PT_Elev), alpha = 0.6) +
  scale_color_viridis_c(option = "magma") +
  labs(title = "Bike Distance vs Elevation",
       x = "Distance to Bike Trails (Bike_dis)",
       y = "Elevation (PT_Elev)") +
  theme_minimal()

# using ggplot2 to plot histogram of PT_Elev
ggplot(boulder, aes(x = PT_Elev)) +
  geom_histogram(bins = 30, fill = "skyblue", color = "black") +
  labs(
    title = "Elevation Distribution in Boulder",
    x = "Elevation (PT_Elev)",
    y = "Frequency"
  ) +
  theme_minimal()

Analysis Summary

Elevation Distribution: The map highlights the variation in elevation across the Boulder area, with higher elevations concentrated in southwest regions.The scatter plot suggests that we can also see the elevation distribution from the histogram.